knitr::opts_knit$set(root.dir = '~/Documents/bgs_sims/')
Simulated data from \(N_e=10,000\) with bottleneck and growth following Beissinger et al. 2016. Munged data will be neutral \(\pi\) and \(\xi_1\) (singletons) shown every 100 generations in each of 24 4Kb windows centered around a 4kb “gene” which has neutral () and deleterious () mutations. Deleterious mutations are drawn from a gamma with some mean s (see below).
library(tidyverse)
library(ggjoy)
library(viridis)
library(cowplot)
For Europeans, we follow a slightly modified model of Torres et al. 2017 but the gene model is as above. Our demographic model is
read_sim<-function(pidata,xidata,neutral=FALSE){
#read data
pi<-as.tibble(read.csv(pidata,header=F))
pi<-mutate(pi,stat=rep("pi",length(pi[,1])))
xi<-as.tibble(read.csv(xidata,header=F))
xi<-mutate(xi,stat=rep("xi",length(xi[,1])))
#munge into single tibble of means
data<-rbind(xi,pi) %>%
`colnames<-`(c("gen", 1:25, "sim", "stat")) %>%
gather(window,value,2:26) %>%
mutate(gen=gen-100000) %>%
group_by(gen,stat,window) %>%
summarize(mean_val=mean(value)) %>%
mutate(window=(as.numeric(window)-13)*4000*3E-5,mean_val=mean_val)
data=mutate(data,mean_val=if_else(window==0.00,mean_val*4,mean_val))
if(neutral==TRUE){
data=mutate(data,mean_val=if_else(window==0.00,mean_val/4,mean_val))
}
return(data)
}
simplot<-function(somedata,mystat,label,bneck){
ggplot(filter(somedata,gen %% 100 <1,stat==mystat,gen>bneck), aes(x=window, y=mean_val, group = gen,color=gen)) +
scale_color_viridis(name='Generation') +
background_grid(major = "xy", minor = "none") +
geom_line() +
ylab(label) + xlab("cM") +
scale_x_continuous(breaks = seq(-1.5,1.5,0.5))
}
merge_sel_neutral <- function(sel_sim_data,neut_sim_data){
merged <- merge(sel_sim_data,neut_sim_data,by=c('gen','stat','window'))
merged$mean_val <- merged$mean_val.x/merged$mean_val.y
merged
}
setwd('~/Documents/bgs_sims/')
maize_neutral <-read_sim("sims/results/sim_neutralPi_maize_neutral2.csv","sims/results/sim_singleton_maize_neutral2.csv",neutral=TRUE)
head(maize_neutral)
## Source: local data frame [6 x 4]
## Groups: gen, stat [1]
##
## # A tibble: 6 x 4
## gen stat window mean_val
## <dbl> <chr> <dbl> <dbl>
## 1 200 pi -1.44 47.63346
## 2 200 pi -0.36 48.47615
## 3 200 pi -0.24 46.79799
## 4 200 pi -0.12 48.13605
## 5 200 pi 0.00 47.08144
## 6 200 pi 0.12 47.98168
unique(maize_neutral$window)
## [1] -1.44 -0.36 -0.24 -0.12 0.00 0.12 0.24 0.36 0.48 0.60 0.72
## [12] -1.32 0.84 0.96 1.08 1.20 1.32 1.44 -1.20 -1.08 -0.96 -0.84
## [23] -0.72 -0.60 -0.48
simplot(maize_neutral,"pi",expression(bar(pi)),2200)
European_neutral <-read_sim("sims/results/sim_neutralPi_European_neutral2.csv","sims/results/sim_singleton_European_neutral2.csv",neutral=TRUE)
simplot(European_neutral,"pi",expression(bar(pi)),2200)
maize<-read_sim("sims/results/sim_neutralPi_maize_s5E-5.csv","sims/results/sim_singleton_maize_s5E-5.csv")
maize_merged <- merge_sel_neutral(maize,maize_neutral)
simplot(maize_merged,"pi",expression(bar(pi)/bar(pi)[neut]),2200)
simplot(maize_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),2200)
European<-read_sim("sims/results/sim_neutralPi_European_s5E-5.csv","sims/results/sim_singleton_European_s5E-5.csv")
European_merged <- merge_sel_neutral(European,European_neutral)
simplot(European_merged,"pi",expression(bar(pi)/bar(pi)[neut]),100)
simplot(European_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),100)
maize<-read_sim("sims/results/sim_neutralPi_maize_s5E-4.csv","sims/results/sim_singleton_maize_s5E-4.csv")
maize_merged <- merge_sel_neutral(maize,maize_neutral)
simplot(maize_merged,"pi",expression(bar(pi)/bar(pi)[neut]),2200)
simplot(maize_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),2200)
European<-read_sim("sims/results/sim_neutralPi_European_s5E-4.csv","sims/results/sim_singleton_European_s5E-4.csv")
European_merged <- merge_sel_neutral(European,European_neutral)
simplot(European_merged,"pi",expression(bar(pi)/bar(pi)[neut]),100)
simplot(European_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),100)
maize<-read_sim("sims/results/sim_neutralPi_maize_s5E-3.csv","sims/results/sim_singleton_maize_s5E-3.csv")
maize_merged <- merge_sel_neutral(maize,maize_neutral)
simplot(maize_merged,"pi",expression(bar(pi)/bar(pi)[neut]),2200)
simplot(maize_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),2200)
European<-read_sim("sims/results/sim_neutralPi_European_s5E-3.csv","sims/results/sim_singleton_European_s5E-3.csv")
European_merged <- merge_sel_neutral(European,European_neutral)
simplot(European_merged,"pi",expression(bar(pi)/bar(pi)[neut]),100)
simplot(European_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),100)
read_single_sim<-function(pidata,xidata,neutral=FALSE){
#read data
pi<-as.tibble(read.csv(pidata,header=F))
pi<-mutate(pi,stat=rep("pi",length(pi[,1])))
xi<-as.tibble(read.csv(xidata,header=F))
xi<-mutate(xi,stat=rep("xi",length(xi[,1])))
#munge into single tibble of means
data<-rbind(xi,pi) %>%
`colnames<-`(c("gen", 1:25, "sim", "stat")) %>%
gather(window,value,2:26) %>%
mutate(gen=gen-100000) %>%
mutate(window=(as.numeric(window)-13)*4000*3E-5)
data=mutate(data,value=if_else(window==0.00,value*4,value))
if(neutral==TRUE){
data=mutate(data,value=if_else(window==0.00,value/4,value))
}
return(data)
}
merge_single_sel_neutral <- function(sel_sim_data,neut_sim_data){
merged <- merge(sel_sim_data,neut_sim_data,by=c('gen','sim','stat','window'))
merged$value <- merged$value.x/merged$value.y
merged
}
sim_anova <- function(somedata,stats){
sep_parm <- filter(somedata,stat==stats,!is.na(value),value != Inf)
sep_parm$gen <- as.factor(sep_parm$gen)
sep_parm$window <- as.factor(sep_parm$window)
fit <- lm(value ~ gen + window + gen*window, data=sep_parm,na.action = na.exclude)
print(anova(fit))
}
maize_neutral <-read_single_sim("sims/results/sim_neutralPi_maize_neutral2.csv","sims/results/sim_singleton_maize_neutral2.csv",neutral=TRUE)
European_neutral <-read_single_sim("sims/results/sim_neutralPi_European_neutral2.csv","sims/results/sim_singleton_European_neutral2.csv",neutral=TRUE)
maize<-read_single_sim("sims/results/sim_neutralPi_maize_s5E-4.csv","sims/results/sim_singleton_maize_s5E-4.csv")
maize_merged <- merge_single_sel_neutral(maize,maize_neutral)
sim_anova(maize_merged,'pi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 0.6 0.018 0.2866 1
## window 24 5592.5 233.021 3722.7011 <2e-16 ***
## gen:window 768 8.1 0.010 0.1675 1
## Residuals 81675 5112.4 0.063
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
European<-read_single_sim("sims/results/sim_neutralPi_European_s5E-4.csv","sims/results/sim_singleton_European_s5E-4.csv")
European_merged <- merge_single_sel_neutral(European,European_neutral)
sim_anova(European_merged,'pi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 2.7 0.084 0.9819 0.4957
## window 24 5714.5 238.104 2784.8759 <2e-16 ***
## gen:window 768 19.4 0.025 0.2950 1.0000
## Residuals 81675 6983.1 0.085
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sim_anova(maize_merged,'xi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 1874 58.556 138.8111 < 2.2e-16 ***
## window 24 47 1.955 4.6348 3.514e-13 ***
## gen:window 768 382 0.497 1.1781 0.0004822 ***
## Residuals 81519 34388 0.422
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sim_anova(European_merged,'xi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 166.5 5.2037 20.1613 < 2.2e-16 ***
## window 24 32.4 1.3508 5.2337 9.943e-16 ***
## gen:window 768 182.5 0.2377 0.9209 0.9418
## Residuals 81675 21080.8 0.2581
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
maize<-read_single_sim("sims/results/sim_neutralPi_maize_s5E-5.csv","sims/results/sim_singleton_maize_s5E-5.csv")
maize_merged <- merge_single_sel_neutral(maize,maize_neutral)
sim_anova(maize_merged,'pi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 1.4 0.043 0.5005 0.9917
## window 24 1918.5 79.938 926.3525 <2e-16 ***
## gen:window 768 11.1 0.014 0.1672 1.0000
## Residuals 81675 7048.0 0.086
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
European<-read_single_sim("sims/results/sim_neutralPi_European_s5E-5.csv","sims/results/sim_singleton_European_s5E-5.csv")
European_merged <- merge_single_sel_neutral(European,European_neutral)
sim_anova(European_merged,'pi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 6.6 0.205 1.7479 0.00553 **
## window 24 1889.0 78.708 670.7534 < 2e-16 ***
## gen:window 768 19.5 0.025 0.2164 1.00000
## Residuals 81675 9583.9 0.117
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sim_anova(maize_merged,'xi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 2195 68.594 146.0606 <2e-16 ***
## window 24 10 0.407 0.8665 0.6507
## gen:window 768 338 0.440 0.9372 0.8915
## Residuals 81519 38283 0.470
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sim_anova(European_merged,'xi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 142.5 4.4520 16.4619 < 2.2e-16 ***
## window 24 25.8 1.0749 3.9745 1.846e-10 ***
## gen:window 768 190.9 0.2486 0.9193 0.9454
## Residuals 81675 22088.4 0.2704
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1